knitr::opts_chunk$set(echo=TRUE,eval=TRUE, message=FALSE, warning=FALSE)
library(httr)
set_config(use_proxy(url="127.0.0.1",port=8668))
library(dplyr)
library(ggplot2)
library(tidyverse) 
library(ggthemes)
library(reshape2)
library(lubridate) 
library(knitr) 
library("readxl")
library(kableExtra)
#install.packages("showtext")
library(showtext)
font_add("Arial", "/Library/Fonts/Arial.ttf")  # Use the actual file path
showtext_auto()
library(xaringanthemer)
style_mono_accent(
  base_color = "#FF7F50",               # bright red
  inverse_background_color = "#002B36", # dark dark blue
  inverse_header_color = "#31b09e",     # light aqua green
  inverse_text_color = "#FFFFFF",       # white
  title_slide_background_color = "var(--base)",
  text_font_google = google_font("Kelly Slab"),
  header_font_google = google_font("Oleo Script")
)
options("scipen"=100, "digits"=4)

Do Large Gathering Events Increase Covid 19 Cases?

library(httr)
set_config(use_proxy(url="127.0.0.1",port=8668))
library(dplyr)
library(ggplot2)
library(tidyverse) 
library(ggthemes)
library(reshape2)
library(lubridate) 
library(knitr) 
library("readxl")
library(kableExtra)
#install.packages("showtext")
library(showtext)
font_add("Arial", "/Library/Fonts/Arial.ttf")  # Use the actual file path
showtext_auto()
library(xaringanthemer)
style_mono_accent(
  base_color = "#FF7F50",               # bright red
  inverse_background_color = "#002B36", # dark dark blue
  inverse_header_color = "#31b09e",     # light aqua green
  inverse_text_color = "#FFFFFF",       # white
  title_slide_background_color = "var(--base)",
  text_font_google = google_font("Kelly Slab"),
  header_font_google = google_font("Oleo Script")
)
options("scipen"=100, "digits"=4)
#How I get covid_us_county_level dataset
#dataFiles <- lapply(Sys.glob("*.csv"), read.csv)
#covid_global<-bind_rows(dataFiles, .id = "column_label")
#covid_us_county_level<-covid_global %>% filter(Country_Region=="US")
#write.csv(x=covid_us_county_level, file="covid_us_county_level")
covid_us_county_level<-read.csv("covid_us_county_level")
trump_2020_rallies<-read.csv("trump 2020 presidential election rallies.csv")

PART 1


We prepare a dataset containing 21 Trump’s Election Rallies’ information for 2020.

You could see there are time, location, participatents and whether the activities is indoor or outdoor information in this dataset.


trump_2020_rallies_intro<-trump_2020_rallies %>% select(Date,City,County,State,Indoors.,People.Counting)
kable(trump_2020_rallies_intro, format = "html") %>%kable_styling(bootstrap_options = c("striped"),full_width = T,font_size = 15) %>%scroll_box(height = "300px")
Date City County State Indoors. People.Counting
6/20/20 Tulsa Tulsa Oklahoma yes 6200
6/23/20 Phoenix Maricopa Arizona yes 3000
8/17/20 Mankato Blue Earth Minnesota no 500
8/17/20 Oshkosh Winnebago Wisconsin no 1000
8/18/20 Yuma Yuma Arizona no NA
8/20/20 Old Forge Lackawanna Pennsylvania no NA
8/28/20 Londonberry Rockingham New Hampshire no 1000
9/3/20 Latrobe Westmoreland Pennsylvania no 7000
9/8/20 Winston-Salem Forsyth North Carolina no 15000
9/10/20 Freeland Saginaw Michigan no 10000
9/12/20 Minden Douglas Nevada no 5000
9/13/20 Henderson Clark Nevada yes NA
9/17/20 Mosinee Marathon Wisconsin no NA
9/18/20 Bemidji Beltrami Minnesota no NA
9/19/20 Fayetteville Cumberland North Carolina no 5600
9/21/20 Swanton Lucas Ohio no NA
9/21/20 Vandalia Vandalia Ohio no 10000
9/22/20 Pittsburgh Allegheny Pennsylvania no NA
9/24/20 Jacksonville Duval Florida no 15000
9/25/20 Newport News Newport News Virginia no 700
9/26/20 Middletown Dauphin Pennsylvania no NA


We also use the Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE to get the county level daily confirmed cases data.


We create 22 Covid 19 daily confirmed cases trend plot for these 21 rallies using these two dataset to see if large gathering event could speed up the spread of Covid 19.

We carefully choose two month period(one month before the rally and one month after the rally to study possible Covid 19 spread speed change.) However, the JHU’s Covid daily report only update twice a week, and both updates are adjacent. Thus, you could see there are points concentrating on certain dates. Therefore we use a week as a comparison unit(the line lays between four points is a week period).


Methodology for Comparsion

we compare the slope of the week when a rally take place with the slope one week before the rally to see if the rally make a change to the Covid 19 spread speed.(if rally is on the JHU update dates(the points), we will compare the slope after the points and before the points)

Tulsa<-covid_us_county_level %>% filter(Admin2=="Tulsa")
Maricopa<-covid_us_county_level %>% filter(Admin2=="Maricopa")
Blue_Earth<-covid_us_county_level %>% filter(Admin2=="Blue Earth")
Winnebago<-covid_us_county_level %>% filter(Admin2=="Winnebago") %>% filter(Province_State=="Wisconsin")
Yuma<-covid_us_county_level %>% filter(Admin2=="Yuma") %>% filter(Province_State=="Arizona")
Lackawanna<-covid_us_county_level %>% filter(Admin2=="Lackawanna")
Rockingham<-covid_us_county_level %>% filter(Admin2=="Rockingham") %>% filter(Province_State=="New Hampshire")
Westmoreland<-covid_us_county_level %>% filter(Admin2=="Westmoreland") %>% filter(Province_State=="Pennsylvania")
Forsyth<-covid_us_county_level %>% filter(Admin2=="Forsyth") %>% filter(Province_State=="North Carolina")
Saginaw<-covid_us_county_level %>% filter(Admin2=="Saginaw")
Clark<-covid_us_county_level %>% filter(Admin2=="Clark")%>% filter(Province_State=="Nevada")
Douglas<-covid_us_county_level %>% filter(Admin2=="Douglas")%>% filter(Province_State=="Nevada")
Marathon<-covid_us_county_level %>% filter(Admin2=="Marathon")
Beltrami<-covid_us_county_level %>% filter(Admin2=="Beltrami")
Cumberland<-covid_us_county_level %>% filter(Admin2=="Cumberland") %>% filter(Province_State=="North Carolina")
Lucas<-covid_us_county_level %>% filter(Admin2=="Lucas") %>% filter(Province_State=="Ohio")
Vandalia <-covid_us_county_level %>% filter(Admin2=="Vandalia")
Allegheny<-covid_us_county_level %>% filter(Admin2=="Allegheny")
Duval<-covid_us_county_level %>% filter(Admin2=="Duval") %>% filter(Province_State=="Florida")
Newport_News<-covid_us_county_level %>% filter(Admin2=="Newport News")
Dauphin<-covid_us_county_level %>% filter(Admin2=="Dauphin")









PART 2 - 1


There are three comparsion result, we prepare one typical example for each result to illustrate our methodology.





Example – Spread Speed Increases

Marathon_date<-Marathon%>% separate(Last_Update,c("date","time"),sep=" ")
Marathon_date$date<-as.Date(parse_date_time(Marathon_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Marathon_date_narrow<-Marathon_date %>% filter(date>=as.Date("2020-08-17") & date<=as.Date("2020-10-17"))
ggplot(Marathon_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+ylim(c(0,3000))+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-17","2020-10-17")))+labs(title="Covid Daily Comfirmed Cases for Marathon(Mosinee), Wisconsin",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-17"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-17, the slope of line between 2020-09-12 to 2020-09-19 is steeper than 2020-09-06 to 2020-09-13. This might implies the Covid 19’s spread in Marathon(Mosinee), Wisconsin speed up after the rally.



Example – Spread Speed No Change

Blue_Earth_date<-Blue_Earth %>% separate(Last_Update,c("date","time"),sep=" ")
Blue_Earth_date$date<-as.Date(parse_date_time(Blue_Earth_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Blue_Earth_date_narrow<-Blue_Earth_date %>% filter(date>=as.Date("2020-07-17") & date<=as.Date("2020-09-17"))
ggplot(Blue_Earth_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", limits = as.Date(c("2020-07-17","2020-09-17")))+ylim(c(500,2000))+labs(title="Covid Daily Comfirmed Cases for Blue Earth(Mankato), Minnesota",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-08-17"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-08-17, the slope of line between 2020-08-13 to 2020-08-20’s slope is very similar with before. This might implies the election rally has no obvious effect on Blue Earth(Mankato), Minnesota.



Example – Spread Speed Slows Down

Saginaw_date<-Forsyth%>% separate(Last_Update,c("date","time"),sep=" ")
Saginaw_date$date<-as.Date(parse_date_time(Saginaw_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Saginaw_date_narrow<-Saginaw_date %>% filter(date>=as.Date("2020-08-10") & date<=as.Date("2020-10-10"))
ggplot(Saginaw_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-08-10","2020-10-10")))+ylim(c(5000,7500))+labs(title="Covid Daily Comfirmed Cases for Saginaw(Freeland), Michigan",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-10"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-10, the slope of line between 2020-09-06 to 2020-09-13 is flatter than before. This might implies the Covid 19’s spread slows down after the rally in Saginaw(Freeland), Michigan.
















PART 2 - 2


Trend plots of the rest of 21 rallies.

Winnebago_date<-Winnebago %>% separate(Last_Update,c("date","time"),sep=" ")
Winnebago_date$date<-as.Date(parse_date_time(Winnebago_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Winnebago_date_narrow<-Winnebago_date %>% filter(date>=as.Date("2020-07-17") & date<=as.Date("2020-09-17"))
ggplot(Winnebago_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", limits = as.Date(c("2020-07-17","2020-09-17")))+ylim(c(500,2000))+labs(title="Covid Daily Comfirmed Cases for Winnebago(Oshkosh),Wisconsin",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-08-17"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-08-17, the slope of line between 2020-08-13 to 2020-08-20 is flatter than before. This might implies the Covid 19’s spread slows down after the rally in Winnebago(Oshkosh),Wisconsin.

Tulsa_date<-Tulsa %>% separate(Last_Update,c("date","time"),sep=" ")
Tulsa_date$date<-as.Date(parse_date_time(Tulsa_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Tulsa_date_narrow<-Tulsa_date %>% filter(date>=as.Date("2020-04-20") & date<=as.Date("2020-07-20"))
ggplot(Tulsa_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow =arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", limits = as.Date(c("2020-05-20","2020-07-17")))+scale_y_continuous(breaks =seq(0,12000,1000))+labs(title="Covid Daily Comfirmed Cases for Tulsa, Oklahoma",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-06-20"), linetype = "dashed",size=0.3,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-06-10, the slope of line between 2020-06-20 to2020-06-27 is much steeper than before. This might implies the election rally speed up the Covid 19’s spread in Tulsa, Oklahoma.

Maricopa_date<-Maricopa %>% separate(Last_Update,c("date","time"),sep=" ")
Maricopa_date$date<-as.Date(parse_date_time(Maricopa_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Maricopa_date_narrow<-Maricopa_date %>% filter(date>=as.Date("2020-05-23") & date<=as.Date("2020-07-23"))
ggplot(Maricopa_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-05-23","2020-07-23")))+scale_y_continuous(breaks =seq(0,100000,20000))+labs(title="Covid Daily Comfirmed Cases for Maricopa(Phoenix), Arizona",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-06-23"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-06-23, the slope of line between 2020-06-20 to 2020-06-27 is steeper than before. This might implies the election rally speed up the Covid 19’s spread in Maricopa(Phoenix), Arizona.However, the effect is not very obvious.

Yuma_date<-Yuma%>% separate(Last_Update,c("date","time"),sep=" ")
Yuma_date$date<-as.Date(parse_date_time(Yuma_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Yuma_date_narrow<-Yuma_date %>% filter(date>=as.Date("2020-07-18") & date<=as.Date("2020-09-18"))
ggplot(Yuma_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-07-18","2020-09-18")))+ylim(c(8000,14000))+labs(title="Covid Daily Comfirmed Cases for Yuma, Arizona",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-08-18"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-08-18, the slope of line between 2020-08-13 to 2020-08-20 is slightly steeper than before. This might implies the Covid 19’s spread in Tulsa, Oklahoma slightly speed up after the rally.

Lackawanna_date<-Lackawanna%>% separate(Last_Update,c("date","time"),sep=" ")
Lackawanna_date$date<-as.Date(parse_date_time(Lackawanna_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Lackawanna_date_narrow<-Lackawanna_date %>% filter(date>=as.Date("2020-07-20") & date<=as.Date("2020-09-20"))
ggplot(Lackawanna_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-07-20","2020-09-20")))+ylim(c(1500,2500))+labs(title="Covid Daily Comfirmed Cases for Lackawanna(Old Forge), Pennsylvania",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-08-20"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-08-20, the slope of line between 2020-08-19 to 2020-08-26 is steeper than 2020-08-13 to 2020-08-20. This might implies the Covid 19’s spread in Lackawanna(Old Forge), Pennsylvania speed up after the election rally.

Rockingham_date<-Rockingham%>% separate(Last_Update,c("date","time"),sep=" ")
Rockingham_date$date<-as.Date(parse_date_time(Rockingham_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Rockingham_date_narrow<-Rockingham_date %>% filter(date>=as.Date("2020-07-28") & date<=as.Date("2020-09-28"))
ggplot(Rockingham_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-07-28","2020-09-28")))+ylim(c(1600,2050))+labs(title="Covid Daily Comfirmed Cases for Rockingham(Londonberry),New Hampshire",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-08-28"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-08-28, the slope of line between 2020-08-25 to 2020-09-01 is almost the same as before. This might implies the Covid 19’s spread in Rockingham(Londonberry),New Hampshire slightly has no obvious change after the election rally.

Westmoreland_date<-Westmoreland%>% separate(Last_Update,c("date","time"),sep=" ")
Westmoreland_date$date<-as.Date(parse_date_time(Westmoreland_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Westmoreland_date_narrow<-Westmoreland_date %>% filter(date>=as.Date("2020-08-03") & date<=as.Date("2020-10-03"))
ggplot(Westmoreland_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-08-03","2020-10-03")))+ylim(c(1400,2400))+labs(title="Covid Daily Comfirmed Cases for Westmoreland(Latrobe),Pennsylvania ",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-08"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-06-10,the slope of line between 2020-09-06 to 2020-09-13 is almost the same as 2020-08-25 to 2020-09-01. This might implies the Covid 19’s spread in Westmoreland(Latrobe),Pennsylvania has no obvious change after the election rally.

Forsyth_date<-Forsyth%>% separate(Last_Update,c("date","time"),sep=" ")
Forsyth_date$date<-as.Date(parse_date_time(Forsyth_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Forsyth_date_narrow<-Forsyth_date %>% filter(date>=as.Date("2020-08-08") & date<=as.Date("2020-10-08"))
ggplot(Forsyth_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-08-08","2020-10-08")))+ylim(c(5000,8000))+labs(title="Covid Daily Comfirmed Cases for Forsyth(Winston-Salem), North Carolina",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-08"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-08, the slope of line between 2020-08-13 to 2020-08-20 is flatter than before. This might implies the Covid 19’s spread slows down after the rally in Forsyth(Winston-Salem), North Carolina.

Douglas_date<-Douglas%>% separate(Last_Update,c("date","time"),sep=" ")
Douglas_date$date<-as.Date(parse_date_time(Douglas_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Douglas_date_narrow<-Douglas_date %>% filter(date>=as.Date("2020-08-12") & date<=as.Date("2020-10-12"))
ggplot(Douglas_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+ylim(c(200,350))+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-08-12","2020-10-12")))+labs(title="Covid Daily Comfirmed Cases for Douglas(Minden), Nevada",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-12"), linetype = "dashed",size=0.5                                              ,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-12, the slope of line between 2020-09-12 to 2020-09-19 is steeper than 2020-09-06 to 2020-09-13. This might implies the Covid 19’s spread in Douglas(Minden), Nevada speed up after the rally.

Clark_date<-Clark%>% separate(Last_Update,c("date","time"),sep=" ")
Clark_date$date<-as.Date(parse_date_time(Clark_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Clark_date_narrow<-Clark_date %>% filter(date>=as.Date("2020-08-13") & date<=as.Date("2020-10-13"))
ggplot(Clark_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+ylim(c(50000,74000))+scale_x_date(date_breaks = "10 day", 
                 limits = as.Date(c("2020-08-13","2020-10-10")))+labs(title="Covid Daily Comfirmed Cases for Clark(Henderson), Nevada",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-13"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-13, the slope of line between 2020-09-12 to 2020-09-19 is almost the same as 2020-09-06 to 2020-09-13. This might implies the Covid 19’s spread in Clark(Henderson), Nevada has no obvious change after the election rally.

Beltrami_date<-Beltrami%>% separate(Last_Update,c("date","time"),sep=" ")
Beltrami_date$date<-as.Date(parse_date_time(Beltrami_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Beltrami_date_narrow<-Beltrami_date %>% filter(date>=as.Date("2020-08-18") & date<=as.Date("2020-10-18"))
ggplot(Beltrami_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-18","2020-10-18")))+ylim(c(250,750))+labs(title="Covid Daily Comfirmed Cases for Beltrami(Bemidji), Minnesota",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-18"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-18, the slope of line between 2020-09-18 to 2020-09-25 is almost the same as 2020-09-12 to 2020-09-19 This might implies the Covid 19’s spread in Beltrami(Bemidji), Minnesota has no obvious change after the election rally.

Cumberland_date<-Cumberland%>% separate(Last_Update,c("date","time"),sep=" ")
Cumberland_date$date<-as.Date(parse_date_time(Cumberland_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Cumberland_date_narrow<-Cumberland_date %>% filter(date>=as.Date("2020-08-19") & date<=as.Date("2020-10-19"))
ggplot(Cumberland_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-19","2020-10-15")))+ylim(c(3000,7000))+labs(title="Covid Daily Comfirmed Cases for Cumberland(Fayetteville), North Carolina",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-19"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 9/19/20, the slope of line between 2020-09-18 to 2020-09-25 is almost the same as 2020-09-12 to 2020-09-19 This might implies the Covid 19’s spread in Cumberland(Fayetteville), North Carolina has no obvious change after the election rally.

Lucas_date<-Lucas%>% separate(Last_Update,c("date","time"),sep=" ")
Lucas_date$date<-as.Date(parse_date_time(Lucas_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Lucas_date_narrow<-Lucas_date %>% filter(date>=as.Date("2020-08-21") & date<=as.Date("2020-10-21"))
ggplot(Lucas_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-21","2020-10-15")))+ylim(c(6000,8500))+labs(title="Covid Daily Comfirmed Cases for Lucas(Swanton),Ohio",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-21"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-21,the slope of line between 2020-09-18 to 2020-09-25 is almost the same as 2020-09-12 to 2020-09-19 This might implies the Covid 19’s spread in Lucas(Swanton),Ohio has no obvious change after the election rally.

Vandalia_date<-Lucas%>% separate(Last_Update,c("date","time"),sep=" ")
Vandalia_date$date<-as.Date(parse_date_time(Vandalia_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Vandalia_date_narrow<-Vandalia_date %>% filter(date>=as.Date("2020-08-21") & date<=as.Date("2020-10-21"))
ggplot(Vandalia_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-21","2020-10-15")))+ylim(c(6000,8000))+labs(title="Covid Daily Comfirmed Cases for Vandalia, Ohio",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-21"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-21,the slope of line between 2020-09-18 to 2020-09-25 is almost the same as 2020-09-12 to 2020-09-19 This might implies the Covid 19’s spread in Vandalia, Ohio has no obvious change after the election rally.

Allegheny_date<-Allegheny%>% separate(Last_Update,c("date","time"),sep=" ")
Allegheny_date$date<-as.Date(parse_date_time(Allegheny_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Allegheny_date_narrow<-Allegheny_date %>% filter(date>=as.Date("2020-08-22") & date<=as.Date("2020-10-22"))
ggplot(Allegheny_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-22","2020-10-22")))+ylim(c(10000,15000))+labs(title="Covid Daily Comfirmed Cases for Allegheny(Pittsburgh), Pennsylvania",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-22"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-22,the slope of line between 2020-09-18 to 2020-09-25 is almost the same as 2020-09-12 to 2020-09-19 This might implies the Covid 19’s spread in Allegheny(Pittsburgh), Pennsylvania has no obvious change after the election rally.

Duval_date<-Duval%>% separate(Last_Update,c("date","time"),sep=" ")
Duval_date$date<-as.Date(parse_date_time(Duval_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Duval_date_narrow<-Duval_date %>% filter(date>=as.Date("2020-08-24") & date<=as.Date("2020-10-24"))
ggplot(Duval_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-24","2020-10-24")))+ylim(c(25000,35000))+labs(title="Covid Daily Comfirmed Cases for Duval(Jacksonville), Florida",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-24"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-24,the slope of line between 2020-09-24 to 2020-10-01 is almost the same as 2020-09-18 to 2020-09-25. This might implies the Covid 19’s spread in Duval(Jacksonville), Florida has no obvious change after the election rally.

Newport_News_date<-Newport_News%>% separate(Last_Update,c("date","time"),sep=" ")
Newport_News_date$date<-as.Date(parse_date_time(Newport_News_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Newport_News_date_narrow<-Newport_News_date %>% filter(date>=as.Date("2020-08-25") & date<=as.Date("2020-10-25"))
ggplot(Newport_News_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-25","2020-10-25")))+ylim(c(2000,3500))+labs(title="Covid Daily Comfirmed Cases for Newport News, Virginia",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-25"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-25,the slope of line between 2020-09-24 to 2020-10-01 is almost the same as 2020-09-18 to 2020-09-25. This might implies the Covid 19’s spread in Newport News, Virginia has no obvious change after the election rally.

Dauphin_date<-Dauphin%>% separate(Last_Update,c("date","time"),sep=" ")
Dauphin_date$date<-as.Date(parse_date_time(Dauphin_date$date, orders = c("%m/%d/%y","%y-%m-%d")))
Dauphin_date_narrow<-Dauphin_date %>% filter(date>=as.Date("2020-08-26") & date<=as.Date("2020-10-26"))
ggplot(Dauphin_date_narrow,aes(x=date,y=Confirmed))+geom_line(col="lightsteelblue2",size =1.5,arrow = arrow())+geom_point(color="steelblue")+ylim(c(3000,4500))+scale_x_date(date_breaks = "7 day", 
                 limits = as.Date(c("2020-08-26","2020-10-22")))+labs(title="Covid Daily Comfirmed Cases for Dauphin(Middletown), Pennsylvania",caption= "Source: Novel Coronavirus (COVID-19) Cases, provided by JHU CSSE",y="Confirmed Covid Cases Per Day")+geom_vline(xintercept = as.Date("2020-09-26"), linetype = "dashed",size=0.5,col="coral")+theme_xaringan()+
  theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=8,angle=-45, hjust=0.1,vjust=1), 
        axis.text.y = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10,face="bold"),
        axis.title.x = element_blank(),
        plot.caption = element_text(size=11, face="bold"),
        panel.grid.major.x = element_blank()
        )

From the above graph, we could see after the rally event at 2020-09-26,the slope of line between 2020-09-24 to 2020-10-01 is almost the same as 2020-09-18 to 2020-09-25. This might implies the Covid 19’s spread in Dauphin(Middletown), Pennsylvania has no obvious change after the election rally.









Part 3


Using the same methodology introduced before, we prepare a Covid 19 Spread Speed Change Summary table to show all the comparision results.


Covid 19 Spread Speed Change Summary table

trump_2020_rallies_summary<-trump_2020_rallies %>%select(Date,City,State,Indoors.,Covid.Spread.After.Rally)
kable(trump_2020_rallies_summary,format = "html")%>%kable_styling(bootstrap_options = c("striped"),full_width = T,font_size = 15) %>%scroll_box(height = "300px") 
Date City State Indoors. Covid.Spread.After.Rally
6/20/20 Tulsa Oklahoma yes Speed up
6/23/20 Phoenix Arizona yes Speed up
8/17/20 Mankato Minnesota no No effect
8/17/20 Oshkosh Wisconsin no Slow down
8/18/20 Yuma Arizona no Speed up
8/20/20 Old Forge Pennsylvania no Speed up
8/28/20 Londonberry New Hampshire no No effect
9/3/20 Latrobe Pennsylvania no No effect
9/8/20 Winston-Salem North Carolina no Slow down
9/10/20 Freeland Michigan no Slow down
9/12/20 Minden Nevada no Speed up
9/13/20 Henderson Nevada yes Speed up
9/17/20 Mosinee Wisconsin no Speed up
9/18/20 Bemidji Minnesota no Speed up
9/19/20 Fayetteville North Carolina no No effect
9/21/20 Swanton Ohio no No effect
9/21/20 Vandalia Ohio no No effect
9/22/20 Pittsburgh Pennsylvania no Slow down
9/24/20 Jacksonville Florida no No effect
9/25/20 Newport News Virginia no No effect
9/26/20 Middletown Pennsylvania no No effect

From the above table we Could see, among all the rallies, only 38.1%(8/21) cities might have increased Covid 19 spread speed. Thus, it is hard to conclude that rallies have negative effect on Covid 19 spread.





Part 4


####Does Indoor or Outdoor matter?



ggplot(trump_2020_rallies,aes(x=Indoors.,fill=Covid.Spread.After.Rally))+geom_bar(position="fill",width=0.5)+scale_x_discrete(labels=c("Outdoor Rallies", "Indoor Rallies"))+labs(title = "The composition of Covid Spread Speed Change after Trump's Indoor and Outdoor Election Rallies", caption = ,y="Percentage")+scale_fill_manual(values=c("lightsteelblue","lightyellow","coral"),label=c("No Change","Slow down","Speed Up"))+guides(fill=guide_legend(title="Covid 19 spread Speed Change after Rally"))+theme_pander()+theme(axis.text.y = element_text (face="bold",size=10),plot.title = element_text(size = 12, face = "bold", hjust = 0.5), 
        axis.text.x = element_text(size=10, face="bold"), 
        axis.title.y = element_text(size=10, face="bold"),
        axis.title.x = element_blank(),
                                    legend.position="top"
                                  )

From the above graph we could see after all indoors’ rally, the Covid 19’s spread speed increase. However, the among the outdoor rallies, the situation is much better that more than 75% of cities’ Covid 19 spread speed remain the same or even slow down.